JSON 是最常見的資料交換格式。
然而,確保 JSON 資料符合預期的結構和類型是一個重要的挑戰。
今天我們將實作一個 JSON Schema 驗證器,它可以根據預定義的 schema 來驗證 JSON 資料的正確性
cargo new json_schema_validator
cd json_schema_validator
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"
regex = "1.10"
use thiserror::Error;
#[derive(Error, Debug)]
pub enum ValidationError {
#[error("Type mismatch: expected {expected}, got {got}")]
TypeMismatch { expected: String, got: String },
#[error("Required property missing: {0}")]
MissingProperty(String),
#[error("Value {value} is less than minimum {minimum}")]
BelowMinimum { value: f64, minimum: f64 },
#[error("Value {value} is greater than maximum {maximum}")]
AboveMaximum { value: f64, maximum: f64 },
#[error("String length {length} is less than minLength {min_length}")]
StringTooShort { length: usize, min_length: usize },
#[error("String length {length} is greater than maxLength {max_length}")]
StringTooLong { length: usize, max_length: usize },
#[error("Array length {length} is less than minItems {min_items}")]
ArrayTooShort { length: usize, min_items: usize },
#[error("Array length {length} is greater than maxItems {max_items}")]
ArrayTooLong { length: usize, max_items: usize },
#[error("Pattern mismatch: value does not match pattern {pattern}")]
PatternMismatch { pattern: String },
#[error("Enum mismatch: value not in allowed values")]
EnumMismatch,
#[error("Additional property not allowed: {0}")]
AdditionalPropertyNotAllowed(String),
#[error("Validation error at {path}: {error}")]
NestedError { path: String, error: Box<ValidationError> },
}
pub type ValidationResult = Result<(), ValidationError>;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Schema {
#[serde(rename = "type")]
pub schema_type: Option<SchemaType>,
pub properties: Option<HashMap<String, Schema>>,
pub required: Option<Vec<String>>,
pub additional_properties: Option<bool>,
// 數字驗證
pub minimum: Option<f64>,
pub maximum: Option<f64>,
pub exclusive_minimum: Option<f64>,
pub exclusive_maximum: Option<f64>,
// 字串驗證
pub min_length: Option<usize>,
pub max_length: Option<usize>,
pub pattern: Option<String>,
// 陣列驗證
pub items: Option<Box<Schema>>,
pub min_items: Option<usize>,
pub max_items: Option<usize>,
pub unique_items: Option<bool>,
// 列舉驗證
#[serde(rename = "enum")]
pub enum_values: Option<Vec<Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum SchemaType {
String,
Number,
Integer,
Boolean,
Object,
Array,
Null,
}
use regex::Regex;
pub struct Validator {
schema: Schema,
}
impl Validator {
pub fn new(schema: Schema) -> Self {
Self { schema }
}
pub fn from_str(schema_str: &str) -> Result<Self, serde_json::Error> {
let schema: Schema = serde_json::from_str(schema_str)?;
Ok(Self::new(schema))
}
pub fn validate(&self, data: &Value) -> ValidationResult {
self.validate_with_schema(data, &self.schema, "")
}
fn validate_with_schema(
&self,
data: &Value,
schema: &Schema,
path: &str,
) -> ValidationResult {
// 驗證型別
if let Some(schema_type) = &schema.schema_type {
self.validate_type(data, schema_type, path)?;
}
// 根據資料類型進行具體驗證
match data {
Value::Object(obj) => self.validate_object(obj, schema, path)?,
Value::Array(arr) => self.validate_array(arr, schema, path)?,
Value::String(s) => self.validate_string(s, schema, path)?,
Value::Number(n) => self.validate_number(n, schema, path)?,
_ => {}
}
// 驗證列舉
if let Some(enum_values) = &schema.enum_values {
if !enum_values.contains(data) {
return Err(ValidationError::EnumMismatch);
}
}
Ok(())
}
fn validate_type(
&self,
data: &Value,
schema_type: &SchemaType,
_path: &str,
) -> ValidationResult {
let actual_type = match data {
Value::Null => SchemaType::Null,
Value::Bool(_) => SchemaType::Boolean,
Value::Number(n) => {
if n.is_i64() || n.is_u64() {
SchemaType::Integer
} else {
SchemaType::Number
}
}
Value::String(_) => SchemaType::String,
Value::Array(_) => SchemaType::Array,
Value::Object(_) => SchemaType::Object,
};
let type_matches = match schema_type {
SchemaType::Number => {
matches!(actual_type, SchemaType::Number | SchemaType::Integer)
}
_ => schema_type == &actual_type,
};
if !type_matches {
return Err(ValidationError::TypeMismatch {
expected: format!("{:?}", schema_type),
got: format!("{:?}", actual_type),
});
}
Ok(())
}
fn validate_object(
&self,
obj: &serde_json::Map<String, Value>,
schema: &Schema,
path: &str,
) -> ValidationResult {
// 驗證必要屬性
if let Some(required) = &schema.required {
for prop in required {
if !obj.contains_key(prop) {
return Err(ValidationError::MissingProperty(prop.clone()));
}
}
}
// 驗證屬性
if let Some(properties) = &schema.properties {
for (key, value) in obj {
if let Some(prop_schema) = properties.get(key) {
let new_path = if path.is_empty() {
key.clone()
} else {
format!("{}.{}", path, key)
};
self.validate_with_schema(value, prop_schema, &new_path)
.map_err(|e| ValidationError::NestedError {
path: new_path,
error: Box::new(e),
})?;
} else if !schema.additional_properties.unwrap_or(true) {
return Err(ValidationError::AdditionalPropertyNotAllowed(
key.clone(),
));
}
}
}
Ok(())
}
fn validate_array(
&self,
arr: &[Value],
schema: &Schema,
path: &str,
) -> ValidationResult {
// 驗證陣列長度
if let Some(min_items) = schema.min_items {
if arr.len() < min_items {
return Err(ValidationError::ArrayTooShort {
length: arr.len(),
min_items,
});
}
}
if let Some(max_items) = schema.max_items {
if arr.len() > max_items {
return Err(ValidationError::ArrayTooLong {
length: arr.len(),
max_items,
});
}
}
// 驗證唯一性
if schema.unique_items.unwrap_or(false) {
let mut seen = std::collections::HashSet::new();
for item in arr {
let item_str = serde_json::to_string(item).unwrap();
if !seen.insert(item_str) {
return Err(ValidationError::NestedError {
path: path.to_string(),
error: Box::new(ValidationError::EnumMismatch),
});
}
}
}
// 驗證陣列元素
if let Some(items_schema) = &schema.items {
for (i, item) in arr.iter().enumerate() {
let new_path = format!("{}[{}]", path, i);
self.validate_with_schema(item, items_schema, &new_path)
.map_err(|e| ValidationError::NestedError {
path: new_path,
error: Box::new(e),
})?;
}
}
Ok(())
}
fn validate_string(
&self,
s: &str,
schema: &Schema,
_path: &str,
) -> ValidationResult {
// 驗證長度
if let Some(min_length) = schema.min_length {
if s.len() < min_length {
return Err(ValidationError::StringTooShort {
length: s.len(),
min_length,
});
}
}
if let Some(max_length) = schema.max_length {
if s.len() > max_length {
return Err(ValidationError::StringTooLong {
length: s.len(),
max_length,
});
}
}
// 驗證正則表達式
if let Some(pattern) = &schema.pattern {
let regex = Regex::new(pattern).map_err(|_| {
ValidationError::PatternMismatch {
pattern: pattern.clone(),
}
})?;
if !regex.is_match(s) {
return Err(ValidationError::PatternMismatch {
pattern: pattern.clone(),
});
}
}
Ok(())
}
fn validate_number(
&self,
n: &serde_json::Number,
schema: &Schema,
_path: &str,
) -> ValidationResult {
let value = n.as_f64().unwrap();
// 驗證最小值
if let Some(minimum) = schema.minimum {
if value < minimum {
return Err(ValidationError::BelowMinimum { value, minimum });
}
}
if let Some(exclusive_minimum) = schema.exclusive_minimum {
if value <= exclusive_minimum {
return Err(ValidationError::BelowMinimum {
value,
minimum: exclusive_minimum,
});
}
}
// 驗證最大值
if let Some(maximum) = schema.maximum {
if value > maximum {
return Err(ValidationError::AboveMaximum { value, maximum });
}
}
if let Some(exclusive_maximum) = schema.exclusive_maximum {
if value >= exclusive_maximum {
return Err(ValidationError::AboveMaximum {
value,
maximum: exclusive_maximum,
});
}
}
Ok(())
}
}
use std::fs;
use std::path::PathBuf;
fn main() {
let args: Vec<String> = std::env::args().collect();
if args.len() != 3 {
eprintln!("Usage: {} <schema.json> <data.json>", args[0]);
std::process::exit(1);
}
let schema_path = PathBuf::from(&args[1]);
let data_path = PathBuf::from(&args[2]);
// 讀取 schema
let schema_content = fs::read_to_string(&schema_path)
.unwrap_or_else(|e| {
eprintln!("Error reading schema file: {}", e);
std::process::exit(1);
});
// 讀取資料
let data_content = fs::read_to_string(&data_path)
.unwrap_or_else(|e| {
eprintln!("Error reading data file: {}", e);
std::process::exit(1);
});
// 解析資料
let data: serde_json::Value = serde_json::from_str(&data_content)
.unwrap_or_else(|e| {
eprintln!("Error parsing data JSON: {}", e);
std::process::exit(1);
});
// 建立驗證器
let validator = Validator::from_str(&schema_content)
.unwrap_or_else(|e| {
eprintln!("Error parsing schema JSON: {}", e);
std::process::exit(1);
});
// 執行驗證
match validator.validate(&data) {
Ok(()) => {
println!("✓ Validation successful!");
println!("Data conforms to the schema.");
}
Err(e) => {
eprintln!("✗ Validation failed:");
eprintln!("{}", e);
std::process::exit(1);
}
}
}
一樣先建立一些假資料
user_schema.json
{
"type": "object",
"properties": {
"username": {
"type": "string",
"minLength": 3,
"maxLength": 20,
"pattern": "^[a-zA-Z0-9_]+$"
},
"email": {
"type": "string",
"pattern": "^[^@]+@[^@]+\\.[^@]+$"
},
"age": {
"type": "integer",
"minimum": 0,
"maximum": 150
},
"status": {
"type": "string",
"enum": ["active", "inactive", "pending"]
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"maxItems": 10,
"uniqueItems": true
}
},
"required": ["username", "email", "age"]
}
有效驗證
valid.json
{
"username": "john_doe",
"email": "john@example.com",
"age": 25,
"status": "active",
"tags": ["developer", "rust"]
}
無效驗證
invalid_user.json
{
"username": "jo",
"email": "invalid-email",
"age": 200
}
# 驗證有效資料
cargo run user_schema.json valid_user.json
# 輸出: ✓ Validation successful!
# 驗證無效資料
cargo run user_schema.json invalid_user.json
# 輸出: ✗ Validation failed: ...
好的!